1 Core genome MLST schema and quality control

The E. coli scheme from Enterobase [1] was used with chewBBACA [2] to get the cgMLST profile for each isolate. The number of missing loci in total (sum of LNF, NIPH, PLOT, ALM and ASM from chewBBACA) is plotted below for all isolates.

# Vector of excluded samples
ex_samples <- c(
  "2016-17-565-1-S",
  "2016-02-415-2-S",
  "2016-02-428-2-S",
  "2016-02-486-2-S",
  "2016-02-732-2-S",
  "2014-01-1741-1-S"
  )

# Collapses data frame to unique lines while ignoring NA's
func_paste <- function(x) paste(unique(x[!is.na(x)]), collapse = ", ")

"%not_in%" <- Negate("%in%")


# Isolate data
id_data <- read.table("../data/id.txt",
                      sep = "\t",
                      header = TRUE,
                      stringsAsFactors = F)

isolate_data <- read.table("../data/isolate_data.txt",
                           sep = "\t",
                           header = T,
                           stringsAsFactors = F) %>%
  filter(id %not_in% ex_samples)


# cgMLST data
cgmlst_stats <- read.table("../data/chewbbaca/complete_results/mdata_stats.tsv",
                         sep = "\t",
                         header = TRUE,
                         stringsAsFactors = FALSE) %>%
  filter(FILE != "2016-02-415_S208_pilon_spades.fasta")

detailed_cgmlst_stats <- read.table("../data/chewbbaca/results_statistics.tsv",
                                    sep = "\t",
                                    header = TRUE,
                                    stringsAsFactors = FALSE) %>%
  filter(Genome != "2016-02-415_S208_pilon_spades.fasta")

cgMLST_allele_data <- read.table("../data/chewbbaca/complete_results/cgMLST.tsv",
                              sep = "\t",
                              header = TRUE,
                              stringsAsFactors = FALSE) %>%
  mutate(FILE = sub("_pilon_spades.fasta", "", FILE)) %>%
  dplyr::rename("ref" = FILE) %>%
  left_join(id_data, by = "ref") %>%
  select(id, everything(), -ref) %>%
  filter(id %not_in% ex_samples)
boxplot(cgmlst_stats$number.of.missing.data,
        ylab = "Number of missing loci")

Below follows the detailed results for all loci in the cgMLST scheme. EXC = excact matches, INF = novel allele, LNF = locus not found, NIPH = paralogous locus, PLOT = possible loci on tip of contig, ALM = allele 20 % larger than reference, ASM = allele 20 % smaller than reference. The numbers below each boxplot denote the mean.

test <- detailed_cgmlst_stats %>%
  gather(metric, value, -Genome) %>%
  group_by(metric) %>%
  mutate(mean_val = round(mean(value), 1))

ggplot(test, aes(metric, value)) +
  stat_boxplot(geom = "errorbar", width = 0.5) +
  geom_boxplot() +
  geom_text(data = subset(test,
                          Genome == "2006-01-2184_S30_pilon_spades.fasta"),
            aes(label = mean_val),
            y = -60,
            size = 3.8) +
  scale_x_discrete(limits = c("EXC",
                              "INF",
                              "LNF",
                              "NIPH",
                              "PLOT",
                              "ALM",
                              "ASM")) +
  labs(y = "Number of loci") +
  theme(axis.title.x = element_blank(),
        axis.ticks.x = element_blank())

2 Total dendrogram of all isolates

Genetic distances were calculated from the allele data from the cgMLST report, and a dendrogram was drawn from these distances. The annotations are based on data from the ARIBA resistance gene analysis.

cgMLST_data <- read.table("../data/chewbbaca/complete_results/clean_cgMLST_data.txt",
                   sep = "\t",
                   header = TRUE,
                   colClasses = "factor")

total_tree <- as.phylo(hclust(daisy(cgMLST_data, metric = "gower"), method = "average"))


tree_metadata <- read.table("../data/chewbbaca/complete_results/tree_metadata.txt",
                            sep = "\t",
                            header = TRUE)

tree_heatmap_data <- read.table("../data/chewbbaca/complete_results/tree_heatmap_data.txt",
                            sep = "\t",
                            header = TRUE) %>%
  select(-gadX)

names(tree_heatmap_data) <- c("GyrA","ParC","ParE","RpoB","MarA","MarR","SoxR","qepA4","qnr")

palette <- c("Broiler" = "#4575b4",
             "Pig" = "#74add1",
             "Red fox" = "#f46d43",
             "Wild bird" = "#fdae61")

palette2 <- c("1-A" = "#fc8d59",
              "1-I" = "#80b1d3",
              "0" = "grey95")

total_tree_annotated <- ggtree(total_tree,
                               layout = "circular",
                               color = "#808080",
                               size = 0.5) %<+% tree_metadata +
                               geom_tippoint(aes(color = species),
                               size = 3) +
                               geom_tiplab2(aes(label = ST),
                               offset = 0.01,
                               size = 4) +
  scale_color_manual(values = palette)

total_tree_opened <- rotate_tree(open_tree(total_tree_annotated, 8), 93)

gheatmap(total_tree_opened,
         tree_heatmap_data,
         offset = 0.04,
         width = 0.3,
         colnames_offset_y = 2.5,
         colnames_position = "top",
         font.size = 5) +
  scale_fill_manual(values = palette2,
                    labels = c("Gene/Mutation absent",
                               "Acquired gene present",
                               "Intrinsic gene mutated")) +
  theme(legend.position = c(0.9,0.9),
        legend.text = element_text(size = 20))

3 Dendrograms per animal species

metadata_species <- read.table("../data/chewbbaca/complete_results/metadata_species_specific_trees.txt",
                               sep = "\t",
                               header = TRUE) %>%
  select(gyrA, parC, parE, PMQR) %>%
  dplyr::rename("GyrA" = gyrA,
         "ParC" = parC,
         "ParE" = parE)

palette3 <- c("0" = "grey95",
              # gyrA
              "S83L" = "#c6dbef",
              "S83A" = "#9ecae1",
              "D87N" = "#6baed6",
              "D87H" = "#4292c6",
              "D87Y" = "#2171b5",
              "D87G" = "#08519c",
              "S83L, D87N" = "#08306b",
              # parC
              "S80I" = "#a1d99b",
              "S80R" = "#74c476",
              "S57T" = "#41ab5d",
              "A56T, S80I" = "#238b45",
              "S80I, E84V" = "#005a32",
              # parE
              "D475E" = "#fdd0a2",
              "D463N" = "#fdae6b",
              "S458A" = "#fd8d3c",
              "L416F" = "#f16913",
              "H516Y" = "#d94801",
              "L488M, A512T" = "#8c2d04",
              # PMQR
              "qnrA1" = "#dadaeb",
              "qnrB19" = "#bcbddc",
              "qnrS1" = "#9e9ac8",
              "qnrS2" = "#807dba",
              "qnrS4" = "#6a51a3",
              "qepA4" = "#4a1486")

# Broiler tree

broiler_tree <- calc_tree("../data/chewbbaca/complete_results/cgMLST_broiler.tsv")

broiler_tree_annotated <- ggtree(broiler_tree,
                                 layout = "circular",
                                 color = "#808080",
                                 size = 0.5) %<+% tree_metadata +
  geom_tippoint(color = "#4575b4",
                size = 4) +
  geom_tiplab2(aes(label = ST),
               offset = 0.02,
               size = 6)

broiler_complete <- gheatmap(broiler_tree_annotated,
         metadata_species,
         offset = 0.1,
         width = 0.5,
         colnames = FALSE) +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)

# Pig tree
pig_tree <- calc_tree("../data/chewbbaca/complete_results/cgMLST_pig.tsv")

pig_tree_annotated <- ggtree(pig_tree,
                                 layout = "circular",
                                 color = "#808080",
                                 size = 0.5) %<+% tree_metadata +
  geom_tippoint(color = "#74add1",
                size = 4) +
  geom_tiplab2(aes(label = ST),
               offset = 0.02,
               size = 5.5)

pig_complete <- gheatmap(pig_tree_annotated,
         metadata_species,
         offset = 0.09,
         width = 0.5,
         colnames = FALSE) +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)

# Red fox tree
fox_tree <- calc_tree("../data/chewbbaca/complete_results/cgMLST_fox.tsv")

fox_tree_annotated <- ggtree(fox_tree,
                                 layout = "circular",
                                 color = "#808080",
                                 size = 0.5) %<+% tree_metadata +
  geom_tippoint(color = "#f46d43",
                size = 4) +
  geom_tiplab2(aes(label = ST),
               offset = 0.02,
               size = 5.5)

fox_complete <- gheatmap(fox_tree_annotated,
         metadata_species,
         offset = 0.09,
         width = 0.5,
         colnames = FALSE) +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)

# Wild bird tree
bird_tree <- calc_tree("../data/chewbbaca/complete_results/cgMLST_bird.tsv")

bird_tree_annotated <- ggtree(bird_tree,
                                 layout = "circular",
                                 color = "#808080",
                                 size = 0.5) %<+% tree_metadata +
  geom_tippoint(color = "#fdae61",
                size = 4) +
  geom_tiplab2(aes(label = ST),
               offset = 0.02,
               size = 5.5)

bird_complete <- gheatmap(bird_tree_annotated,
         metadata_species,
         offset = 0.09,
         width = 0.5,
         colnames = FALSE) +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)

# Total plot
tot_plot <- plot_grid(broiler_complete,
          pig_complete,
          fox_complete,
          bird_complete,
          nrow = 2,
          ncol = 2,
          align = "h",
          labels = c("Broiler","Pig","Red fox","Wild bird"))

include_graphics("../figures/species_dendrogram2.png")

4 Minimum spanning tree

A minimum spanning tree calculated from the cgMLST data with GrapeTree [3] using MSTreeV2. Number of allele differences are denoted as numbers on branches between nodes. The branches have been collapsed on ten differences or less. The number inside each node is the sequence type based on the Warwick 7-gene MLST scheme [4].

include_graphics("../figures/GrapeTree_cgMLST.png")

5 Statistics

5.1 Distribution of calculated distances per animal species

Below follows a plot of the distribution of the lower triangular distances for each animal species. A bimodal distribution is present in all four plots. The filtering values of < 0.1 and > 0.8 is used for downstream statistics to acertain whether the broiler isolates are more homogenous than the isolates from other species.

# calculates distances from cgMLST data and returns
# the lower triangular of the matrix, with (diag = FALSE)
# or without (diag = TRUE) the diagonal
lower_tri_dist_calc <- function(data, diag = TRUE, data_frame = FALSE) {
  dist <- as.matrix(daisy(data, metric = "gower"))
  if (diag == TRUE) {
    dist[upper.tri(dist, diag = TRUE)] <- NA
  } else {
    dist[upper.tri(dist, diag = FALSE)] <- NA
  }
  dist_vec <- as.vector(as.matrix(dist))
  dist_compl <- dist_vec[!is.na(dist_vec)]
  
  if (data_frame == TRUE) {
    dist_compl <- as.data.frame(as.matrix(dist))
  }
  return(dist_compl)
}

broiler_cgMLST <- read.table("../data/chewbbaca/complete_results/cgMLST_broiler.tsv",
                           sep = "\t",
                           header = TRUE,
                           colClasses = "factor",
                           row.names = 1)

pig_cgMLST <- read.table("../data/chewbbaca/complete_results/cgMLST_pig.tsv",
                           sep = "\t",
                           header = TRUE,
                           colClasses = "factor",
                           row.names = 1)

fox_cgMLST <- read.table("../data/chewbbaca/complete_results/cgMLST_fox.tsv",
                           sep = "\t",
                           header = TRUE,
                           colClasses = "factor",
                           row.names = 1)

bird_cgMLST <- read.table("../data/chewbbaca/complete_results/cgMLST_bird.tsv",
                           sep = "\t",
                           header = TRUE,
                           colClasses = "factor",
                           row.names = 1)

broiler_dist <- lower_tri_dist_calc(broiler_cgMLST)
pig_dist <- lower_tri_dist_calc(pig_cgMLST)
fox_dist <- lower_tri_dist_calc(fox_cgMLST)
bird_dist <- lower_tri_dist_calc(bird_cgMLST)


# Distance plots
par(mfrow = c(2,2))

x1 <- hist(broiler_dist,
     breaks = 20,
     plot = FALSE)

x1$density = x1$counts/sum(x1$counts)*100

plot(x1,
     freq = FALSE,
     main = "Broiler distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))


x2 <- hist(pig_dist,
     breaks = 20,
     plot = FALSE)

x2$density = x2$counts/sum(x2$counts)*100

plot(x2,
     freq = FALSE,
     main = "Pig distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))

x3 <- hist(fox_dist,
     breaks = 20,
     plot = FALSE)

x3$density = x3$counts/sum(x3$counts)*100

plot(x3,
     freq = FALSE,
     main = "Red fox distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))


x4 <- hist(bird_dist,
     breaks = 20,
     plot = FALSE)

x4$density = x4$counts/sum(x4$counts)*100

plot(x4,
     freq = FALSE,
     main = "Wild bird distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))

5.2 Distribution of iterated distances

Distribution of percentages of distances < 0.1 or > 0.8. The distribution is based on 1000 iterations of the distance data. Observed values are denoted as arrows for each animal species; dark blue = Broiler, light blue = Pig, red = Red fox, yellow = Wild bird. The mean value is denoted as a vertical black line.

# generate data frame from allele data
alleles <- cgMLST_allele_data %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  group_by(species) %>%
  mutate(n = 1:n(),
         new_id = case_when(species == "Broiler" ~ paste("BR", n, sep = "_"),
                            species == "Pig" ~ paste("PI", n, sep = "_"),
                            species == "Red fox" ~ paste("RF", n, sep = "_"),
                            species == "Wild bird" ~ paste("WB", n, sep = "_"))) %>%
  ungroup() %>%
  select(new_id, everything(), -c(species, id)) %>%
  mutate_all(funs(as.factor)) %>%
  column_to_rownames("new_id")

# generate data frame for iteration analysis
run_df <- lower_tri_dist_calc(alleles, data_frame = TRUE) %>%
  rownames_to_column("id") %>%
  gather(isolate1, value, -id) %>%
  dplyr::rename("isolate2" = id) %>%
  mutate(group1 = substr(isolate1, start = 1, stop = 2),
         group2 = substr(isolate2, start = 1, stop = 2)) %>%
  select(isolate1, group1, isolate2, group2, value) %>%
  filter(is.na(value) == FALSE,
         group1 == group2) %>%
  arrange(group1, group2)

# functions used in iteration analyses

## Calculates the sum of observations lower than dist_value1 and
## bigger than dist_value2, and calculates the percentage of
## observed values for each group
without <- function(df, run, dist_value1 = 0.1, dist_value2 = 0.8) {
  df <- df %>%
    group_by(group1, group2) %>%
    mutate(n = value < dist_value1 | value > dist_value2) %>%
    group_by(group1, group2, n) %>%
    count() %>%
    ungroup() %>%
    mutate(n = ifelse(n == TRUE, "Within", "Without")) %>%
    spread(n, nn) %>%
    mutate(Percent = round(Within / (Within + Without) * 100, 2),
           iter = run)
  return(df)
}

## Calculates the sum of observations between dist_value1 and dist_value2
## and calculates the percentage of observed values for each group
within <- function(df, run, dist_value1 = 0.1, dist_value2 = 0.8) {
  df <- df %>%
    group_by(group1, group2) %>%
    mutate(n = value > dist_value1 & value < dist_value2) %>%
    group_by(group1, group2, n) %>%
    count() %>%
    ungroup() %>%
    mutate(n = ifelse(n == TRUE, "Within", "Without")) %>%
    spread(n, nn) %>%
    mutate(Percent = round(Within / (Within + Without) * 100, 2),
           iter = run)
  return(df)
}

## Calculates the sum of observations below dist_value1
## for each group
below_calc <- function(df, run, dist_value1 = 0.1) {
  df <- df %>%
    group_by(group1, group2) %>%
    mutate(n = value < dist_value1) %>%
    group_by(group1, group2, n) %>%
    count() %>%
    ungroup() %>%
    mutate(n = ifelse(n == TRUE, "Below", "Above")) %>%
    spread(n, nn) %>%
    mutate(Below = ifelse(is.na(Below) == TRUE, 0, Below)) %>%
    mutate(Percent_below = round(Below / (Above + Below) * 100, 2),
           iter = run)
  return(df)
}

## Calculates the sum of observations above dist_value1
## for each group
above_calc <- function(df, run, dist_value1 = 0.9) {
  df <- df %>%
    group_by(group1, group2) %>%
    mutate(n = value > dist_value1) %>%
    group_by(group1, group2, n) %>%
    count() %>%
    ungroup() %>%
    mutate(n = ifelse(n == TRUE, "Above", "Below")) %>%
    spread(n, nn) %>%
    mutate(Above = ifelse(is.na(Above) == TRUE, 0, Above)) %>%
    mutate(Percent_above = round(Above / (Above + Below) * 100, 2),
           iter = run)
  return(df)
}

## Calculates the mean of observations for each group
mean_calc <- function(df, run) {
  df <- df %>%
    group_by(group1, group2) %>%
    mutate(metric = round(mean(value), 2)) %>%
    select(group1, group2, metric) %>%
    summarise_all(funs(func_paste)) %>%
    mutate(iter = run,
           metric = as.numeric(metric))
}

## Calculates the median of observations for each group
median_calc <- function(df, run) {
  df <- df %>%
    group_by(group1, group2) %>%
    mutate(metric = round(median(value), 2)) %>%
    select(group1, group2, metric) %>%
    summarise_all(funs(func_paste)) %>%
    mutate(iter = run,
           metric = as.numeric(metric))
}

## Calculates the quantiles of the observations for each group
quantile_calc <- function(df, run) {
  df <- df %>%
    group_by(group1, group2) %>%
    mutate(quant_0 = quantile(value)[1],
           quant_25 = quantile(value)[2],
           quant_50 = quantile(value)[3],
           quant_75 = quantile(value)[4],
           quant_100 = quantile(value)[5]) %>%
    select(group1, group2, contains("quant")) %>%
    summarise_all(funs(func_paste)) %>%
    mutate_at(vars(contains("quant")),
              funs(as.numeric)) %>%
    mutate(iter = run)
}

# Iteration function
iterate_samples <- function(df, run, method = "mean", dist_value1, dist_value2) {

  # randomize the value column, regardless of groups
  df <- df %>%
    mutate(value = sample(value))

  # run specified function
  if (method == "mean") {
    df <- mean_calc(df, run)
    }

  if (method == "below") {
   df <- below_calc(df, run, dist_value1)
    }

  if (method == "above") {
   df <- above_calc(df, run, dist_value1)
    }

  if (method == "within") {
    df <- within(df, run, dist_value1, dist_value2)
    }

  if (method == "without") {
    df <- without(df, run, dist_value1, dist_value2)
    }
  
  if (method == "median") {
    df <- median_calc(df, run)
    }
  
  if (method == "quantile") {
    df <- quantile_calc(df, run)
  }
    
    return(df)
}

# run iterations on selected data frame
run_iterations <- function(df, runs = 1000, seed = 10, method = "mean", dist_value1, dist_value2) {
  # set seed
  set.seed(seed)
  
  # create output list
  output <- list()
  
  # create expected values
  if (method == "mean") {
    orig <- mean_calc(df, 0)
    }

  if (method == "below") {
   orig <- below_calc(df, 0, dist_value1)
    }

  if (method == "above") {
   orig <- above_calc(df, 0, dist_value1)
    }
  
  if (method == "within") {
    orig <- within(df, 0, dist_value1, dist_value2)
    }

  if (method == "without") {
    orig <- without(df, 0, dist_value1, dist_value2)
    }

  if (method == "median") {
    orig <- median_calc(df, 0)
    }

  if (method == "quantile") {
    orig <- quantile_calc(df, 0)
  }

  output <- c(output, list(orig))
  
  # run iterations
  for (i in 1:runs) {
    result <- iterate_samples(df,
                              i,
                              method = method,
                              dist_value1 = dist_value1,
                              dist_value2 = dist_value2)
    output <- c(output, list(result))
    }
  return(output)
}

# Run functions
summary_stats <- run_df %>%
  mutate(total_mean = round(mean(value), 3)) %>%
  group_by(group1) %>%
  mutate(mean = round(mean(value), 3),
         var = round(var(value), 3)) %>%
  select(group1, total_mean, mean, var) %>%
  summarise_all(funs(func_paste))

complete_data <- run_iterations(run_df,
                                runs = 1000,
                                method = "without",
                                dist_value1 = 0.1,
                                dist_value2 = 0.8) %>%
  bind_rows() %>%
  mutate(Percent = as.numeric(Percent))

lower_data <- run_iterations(run_df,
                             runs = 1000,
                             method = "below",
                             dist_value1 = 0.1) %>%
  bind_rows() %>%
  mutate(Percent_below = as.numeric(Percent_below))

summary_stats
# Plots

ggplot(complete_data, aes(Percent)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.2, color = "black",
                fill = "grey80") +
  stat_function(fun = dnorm,
                args = with(complete_data, c(mean = mean(Percent), sd = sd(Percent))),
                color = "red") +
  geom_segment(aes(x = complete_data$Percent[1],
                   xend = complete_data$Percent[1], y = 0.08, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#4575b4") +
  geom_segment(aes(x = complete_data$Percent[2],
                   xend = complete_data$Percent[2], y = 0.08, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#74add1") +
  geom_segment(aes(x = complete_data$Percent[3],
                   xend = complete_data$Percent[3], y = 0.08, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#f46d43") +
  geom_segment(aes(x = complete_data$Percent[4],
                   xend = complete_data$Percent[4], y = 0.08, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#fdae61") +
  geom_text(label = "p < 0.001",
            x = 83.30,
            y = 0.16,
            angle = 90) +
  geom_vline(xintercept = mean(complete_data$Percent),
             size = 1) +
  labs(x = "Percent (%) distances < 0.1 or > 0.8",
       y = "Density") +
  ggtitle(label = "Distribution of percentages",
          subtitle = "Number of iterations: 1000") +
  theme(plot.title = element_text(hjust = 0))

ggplot(lower_data, aes(Percent_below)) +
  geom_histogram(aes(y = ..density..), binwidth = 0.2, color = "black",
                fill = "grey80") +
  stat_function(fun = dnorm,
                args = with(lower_data, c(mean = mean(Percent_below), sd = sd(Percent_below))),
                color = "red") +
  geom_segment(aes(x = lower_data$Percent_below[1],
                   xend = lower_data$Percent_below[1], y = 0.15, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#4575b4") +
  geom_segment(aes(x = lower_data$Percent_below[2],
                   xend = lower_data$Percent_below[2], y = 0.15, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#74add1") +
  geom_segment(aes(x = lower_data$Percent_below[3],
                   xend = lower_data$Percent_below[3], y = 0.15, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#f46d43") +
  geom_segment(aes(x = lower_data$Percent_below[4],
                   xend = lower_data$Percent_below[4], y = 0.15, yend = 0),
               arrow = arrow(length = unit(0.3, "cm"),
                             type = "closed"),
               color = "#fdae61") +
  geom_text(label = "p < 0.001",
            x = 5.87,
            y = 0.32,
            angle = 90) +
  geom_vline(xintercept = mean(lower_data$Percent_below),
             size = 1) +
  labs(x = "Percent (%) distances < 0.1",
       y = "Density") +
  ggtitle(label = "Distribution of Percentages",
          subtitle = "Number of iterations: 1000") +
  theme(plot.title = element_text(hjust = 0))

6 References

[1] Alikhan N-F et al. A genomic overview of the population structure of Salmonella. Casadesús J, editor. PLOS Genet [Internet]. 2018 Apr 5;14(4):e1007261. Available from: https://dx.plos.org/10.1371/journal.pgen.1007261

[2] Machado MP et al. chewBBACA: A complete suite for gene-by-gene schema creation and strain identification. Microb Genomics. 2018;1–7.

[3] Z Zhou et al. (2018) “GrapeTree: Visualization of core genomic relationships among 100,000 bacterial pathogens”, Genome Res. doi: https://doi.org/10.1101/gr.232397.117

[4] Wirth T et al. (2006) “Sex and virulence in Escherichia coli : an evolutionary perspective”, Molecular Microbiology (2006) 60 (5), 1136–1151, doi: 10.1111/j.1365-2958.2006.05172.x